rm(list=ls())
gc()

## 06 - ESTIMATE TSMART PID MODELS


## LOAD PACKAGES
library(data.table)
library(lfe)
library(stringr)

data = fread('panel-analysis-2016-2020.csv.gz',select = c('DemDiff','RepDiff','DemSpExpDiff_nohh', 'RepSpExpDiff_nohh','DemSpExp_nohh_year1','RepSpExp_nohh_year1','Age_year1','Race','Gender',
                                                                            'ZipCode','countyfips','Party_year1','hh.n.adj_year1','hh.n.adj_year2',
                                                                            'hh.d.adj_year1','hh.d.adj_year2','hh.r.adj_year1','hh.r.adj_year2',
                                                                            'Married_year1','State', 'WhiteBlockGroupDiff','MarriageDiff','AgeBlockGroupDiff','RegsBlockGroupDiff',
                                                                              'HHIncomeBlockGroupDiff' , 'CollegeBlockGroupDiff' , 'HomeownerBlockGroupDiff' , 'YearBuiltBlockGroupDiff' , 
                                                                              'DriveWorkBlockGroupDiff' , 'EmplBlockGroupDiff' , 'HouseValueBlockGroupDiff',
                                                                            'Party_2012','Party_2013','Party_2014','Party_2015',
                                                                            'DemSpExp_nohh_2012','DemSpExp_nohh_2013','DemSpExp_nohh_2014','DemSpExp_nohh_2015',
                                                                            'RepSpExp_nohh_2012','RepSpExp_nohh_2013','RepSpExp_nohh_2014','RepSpExp_nohh_2015'))
data[!Party_year1%in%c('Democrat','Republican'), Party_year1:='Other']
data[countyfips=='',countyfips:=NA]
data[ZipCode=='',ZipCode:=NA]
data[Gender=='',Gender:=NA]
  # Make household composition variables  
data[,hh.n.diff:=hh.n.adj_year2-hh.n.adj_year1]
data[,hh.d.diff:=hh.d.adj_year2-hh.d.adj_year1]
data[,hh.r.diff:=hh.r.adj_year2-hh.r.adj_year1]


# Make party pre-period variables
data[Party_2012=='',Party_2012:=NA]
data[Party_2013=='',Party_2013:=NA]
data[Party_2014=='',Party_2014:=NA]
data[Party_2015=='',Party_2015:=NA]


data[!Party_2012%in%c('Democrat','Republican'),Party_2012:='Non-Partisan']
data[!Party_2013%in%c('Democrat','Republican'),Party_2013:='Non-Partisan']
data[!Party_2014%in%c('Democrat','Republican'),Party_2014:='Non-Partisan']
data[!Party_2015%in%c('Democrat','Republican'),Party_2015:='Non-Partisan']



#
data = data[!is.na(DemSpExp_nohh_2012)& !is.na(DemSpExp_nohh_2013)&!is.na(DemSpExp_nohh_2014)&!is.na(DemSpExp_nohh_2015)&
            !is.na(RepSpExp_nohh_2012)& !is.na(RepSpExp_nohh_2013)&!is.na(RepSpExp_nohh_2014)&!is.na(RepSpExp_nohh_2015)&
            !is.na(Party_2012)&!is.na(Party_2012)&!is.na(Party_2013)&!is.na(Party_2014)&!is.na(Party_2015)]




# Make partisan exposure pre-period variables
data[,DemSpPre1:=round(DemSpExp_nohh_2012*100)]
data[,RepSpPre1:=round(RepSpExp_nohh_2012*100)]
data[,DemSpPre2:=round(DemSpExp_nohh_2013*100)]
data[,RepSpPre2:=round(RepSpExp_nohh_2013*100)]
data[,DemSpPre3:=round(DemSpExp_nohh_2014*100)]
data[,RepSpPre3:=round(RepSpExp_nohh_2014*100)]
data[,DemSpPre4:=round(DemSpExp_nohh_2015*100)]
data[,RepSpPre4:=round(RepSpExp_nohh_2015*100)]

gc()

  # split data
  dems = data[Party_year1=='Democrat']
  reps = data[Party_year1=='Republican']
  oths = data[Party_year1=='Other']
  
  rm(data)
  gc()
  
  # construct grouping variables based on state, year1 treatment decile, year1 household composition variables, and party and partisan exposure pretrend variables
  
  # drop rows where grouping variables from main spec are missing
  dems = dems[!is.na(Age_year1) & !is.na(Gender) & !is.na(Race) & !is.na(Married_year1) &!is.na(ZipCode)]
  reps = reps[!is.na(Age_year1) & !is.na(Gender) & !is.na(Race) & !is.na(Married_year1) & !is.na(ZipCode)]
  oths = oths[!is.na(Age_year1) & !is.na(Gender) & !is.na(Race) & !is.na(Married_year1) & !is.na(ZipCode)]
  

  # spatial seg treatment
  # democrat exposure
  dems[,DemSpDecile:=cut(DemSpExp_nohh_year1,breaks=unique(as.numeric(quantile(DemSpExp_nohh_year1, probs=seq(0,1,by=.1),na.rm=T))),include.lowest=T)]
  reps[,DemSpDecile:=cut(DemSpExp_nohh_year1,breaks=unique(as.numeric(quantile(DemSpExp_nohh_year1, probs=seq(0,1,by=.1),na.rm=T))),include.lowest=T)]
  oths[,DemSpDecile:=cut(DemSpExp_nohh_year1,breaks=unique(as.numeric(quantile(DemSpExp_nohh_year1, probs=seq(0,1,by=.1),na.rm=T))),include.lowest=T)]
  
  # republican exposure
  dems[,RepSpDecile:=cut(RepSpExp_nohh_year1,breaks=unique(as.numeric(quantile(RepSpExp_nohh_year1, probs=seq(0,1,by=.1),na.rm=T))),include.lowest=T)]
  reps[,RepSpDecile:=cut(RepSpExp_nohh_year1,breaks=unique(as.numeric(quantile(RepSpExp_nohh_year1, probs=seq(0,1,by=.1),na.rm=T))),include.lowest=T)]
  oths[,RepSpDecile:=cut(RepSpExp_nohh_year1,breaks=unique(as.numeric(quantile(RepSpExp_nohh_year1, probs=seq(0,1,by=.1),na.rm=T))),include.lowest=T)]
  
  
  
  #####
  # make groups
  # spatial seg treatment
dems[,GroupSpDemExp:=paste( DemSpDecile,
                            DemSpPre1,DemSpPre2,DemSpPre3,DemSpPre4,
                            Party_2012, Party_2013, Party_2014, Party_2015,hh.n.adj_year1,hh.d.adj_year1, sep ='_')]
reps[,GroupSpDemExp:=paste(DemSpDecile,
                           DemSpPre1,DemSpPre2,DemSpPre3,DemSpPre4,
                           Party_2012, Party_2013, Party_2014, Party_2015,hh.n.adj_year1,hh.d.adj_year1, sep ='_')]
oths[,GroupSpDemExp:=paste(DemSpDecile,
                           DemSpPre1,DemSpPre2,DemSpPre3,DemSpPre4,
                           Party_2012, Party_2013, Party_2014, Party_2015,hh.n.adj_year1,hh.d.adj_year1, sep ='_')]

dems[,GroupSpRepExp:=paste(RepSpDecile,RepSpPre1,RepSpPre2,RepSpPre3,RepSpPre4,Party_2012, Party_2013, Party_2014, Party_2015,hh.n.adj_year1,hh.r.adj_year1, sep ='_')]
reps[,GroupSpRepExp:=paste(RepSpDecile,RepSpPre1,RepSpPre2,RepSpPre3,RepSpPre4,Party_2012, Party_2013, Party_2014, Party_2015,hh.n.adj_year1,hh.r.adj_year1, sep ='_')]
oths[,GroupSpRepExp:=paste(RepSpDecile,RepSpPre1,RepSpPre2,RepSpPre3,RepSpPre4,Party_2012, Party_2013, Party_2014, Party_2015,hh.n.adj_year1,hh.r.adj_year1, sep ='_')]

  
  ## output SDs
  
  dems.sd.DemSpExp = dems[!is.na(DemSpExp_nohh_year1),list(n=.N, mean = mean(DemSpExpDiff_nohh,na.rm=T), sd = sd(DemSpExpDiff_nohh,na.rm=T)),by='GroupSpDemExp']
  dems.sd.RepSpExp = dems[!is.na(RepSpExp_nohh_year1),list(n=.N, mean = mean(RepSpExpDiff_nohh,na.rm=T), sd = sd(RepSpExpDiff_nohh,na.rm=T)),by='GroupSpRepExp']
  
  reps.sd.DemSpExp = reps[!is.na(DemSpExp_nohh_year1),list(n=.N, mean = mean(DemSpExpDiff_nohh,na.rm=T), sd = sd(DemSpExpDiff_nohh,na.rm=T)),by='GroupSpDemExp']
  reps.sd.RepSpExp = reps[!is.na(RepSpExp_nohh_year1),list(n=.N, mean = mean(RepSpExpDiff_nohh,na.rm=T), sd = sd(RepSpExpDiff_nohh,na.rm=T)),by='GroupSpRepExp']
  
  oths.sd.DemSpExp = oths[!is.na(DemSpExp_nohh_year1),list(n=.N, mean = mean(DemSpExpDiff_nohh,na.rm=T), sd = sd(DemSpExpDiff_nohh,na.rm=T)),by='GroupSpDemExp']
  oths.sd.RepSpExp = oths[!is.na(RepSpExp_nohh_year1),list(n=.N, mean = mean(RepSpExpDiff_nohh,na.rm=T), sd = sd(RepSpExpDiff_nohh,na.rm=T)),by='GroupSpRepExp']
  
  
  out = list(dems.sd.DemSpExp, dems.sd.RepSpExp, reps.sd.DemSpExp,reps.sd.RepSpExp, oths.sd.DemSpExp, oths.sd.RepSpExp)
  
names(out) = c('Democrats - Dem Exp',
               'Democrats - Rep Exp',
               'Republicans - Dem Exp',
               'Republicans - Rep Exp',
               'Non-partisans - Dem Exp',
               'Non-partisans - Rep Exp')
save(out, file = paste0('results/sds/sds-2016-2020-nohh-pretrend.Rdata'))


#   ###
  # estimate models
  

  ModelDemSpExpDems = summary(felm(DemDiff ~ DemSpExpDiff_nohh + hh.d.diff+hh.n.diff+WhiteBlockGroupDiff+MarriageDiff+AgeBlockGroupDiff+RegsBlockGroupDiff+
                                     HHIncomeBlockGroupDiff + CollegeBlockGroupDiff + HomeownerBlockGroupDiff + YearBuiltBlockGroupDiff + 
                                     DriveWorkBlockGroupDiff + EmplBlockGroupDiff + HouseValueBlockGroupDiff|GroupSpDemExp|0|countyfips, data = dems[!is.na(DemSpExp_nohh_year1)&!is.na(countyfips)]), robust =T)
  ModelDemSpExpReps = summary(felm(DemDiff ~ DemSpExpDiff_nohh+ hh.d.diff+hh.n.diff+WhiteBlockGroupDiff+MarriageDiff+AgeBlockGroupDiff+RegsBlockGroupDiff+
                                     HHIncomeBlockGroupDiff + CollegeBlockGroupDiff + HomeownerBlockGroupDiff + YearBuiltBlockGroupDiff + 
                                     DriveWorkBlockGroupDiff + EmplBlockGroupDiff + HouseValueBlockGroupDiff|GroupSpDemExp|0|countyfips, data = reps[!is.na(DemSpExp_nohh_year1)&!is.na(countyfips)]), robust =T)
  ModelDemSpExpOths = summary(felm(DemDiff ~ DemSpExpDiff_nohh+ hh.d.diff+hh.n.diff+WhiteBlockGroupDiff+MarriageDiff+AgeBlockGroupDiff+RegsBlockGroupDiff+
                                     HHIncomeBlockGroupDiff + CollegeBlockGroupDiff + HomeownerBlockGroupDiff + YearBuiltBlockGroupDiff + 
                                     DriveWorkBlockGroupDiff + EmplBlockGroupDiff + HouseValueBlockGroupDiff|GroupSpDemExp|0|countyfips, data = oths[!is.na(DemSpExp_nohh_year1)&!is.na(countyfips)]), robust =T)
  
  ModelRepSpExpDems = summary(felm(RepDiff ~ RepSpExpDiff_nohh+ hh.r.diff+hh.n.diff+WhiteBlockGroupDiff+MarriageDiff+AgeBlockGroupDiff+RegsBlockGroupDiff+
                                     HHIncomeBlockGroupDiff + CollegeBlockGroupDiff + HomeownerBlockGroupDiff + YearBuiltBlockGroupDiff + 
                                     DriveWorkBlockGroupDiff + EmplBlockGroupDiff + HouseValueBlockGroupDiff|GroupSpRepExp|0|countyfips, data = dems[!is.na(RepSpExp_nohh_year1)&!is.na(countyfips)]), robust =T)
  ModelRepSpExpReps = summary(felm(RepDiff ~ RepSpExpDiff_nohh+ hh.r.diff+hh.n.diff+WhiteBlockGroupDiff+MarriageDiff+AgeBlockGroupDiff+RegsBlockGroupDiff+
                                     HHIncomeBlockGroupDiff + CollegeBlockGroupDiff + HomeownerBlockGroupDiff + YearBuiltBlockGroupDiff + 
                                     DriveWorkBlockGroupDiff + EmplBlockGroupDiff + HouseValueBlockGroupDiff|GroupSpRepExp|0|countyfips, data = reps[!is.na(RepSpExp_nohh_year1)&!is.na(countyfips)]), robust =T)
  ModelRepSpExpOths = summary(felm(RepDiff ~ RepSpExpDiff_nohh+ hh.r.diff+hh.n.diff+WhiteBlockGroupDiff+MarriageDiff+AgeBlockGroupDiff+RegsBlockGroupDiff+
                                     HHIncomeBlockGroupDiff + CollegeBlockGroupDiff + HomeownerBlockGroupDiff + YearBuiltBlockGroupDiff + 
                                     DriveWorkBlockGroupDiff + EmplBlockGroupDiff + HouseValueBlockGroupDiff|GroupSpRepExp|0|countyfips, data = oths[!is.na(RepSpExp_nohh_year1)&!is.na(countyfips)]), robust =T)
  
  
  
  save(ModelDemSpExpDems, ModelDemSpExpReps, ModelDemSpExpOths, 
       ModelRepSpExpDems, ModelRepSpExpReps, ModelRepSpExpOths,
       
       
       file = paste0('results/current-results-2016-2020-pretrend.Rdata'))
  
